Leakage coefficient fit

This notebook estracts the leakage coefficient from multi-spot smFRET measurements of 4 dsDNA samples.

Import software


In [ ]:
from fretbursts import *
sns = init_notebook()

In [ ]:
import seaborn

In [ ]:
import os
import pandas as pd
from IPython.display import display
from IPython.display import display, Math

Data files


In [ ]:
data_dir = './data/multispot/'

In [ ]:
data_dir = os.path.abspath(data_dir) + '/'
assert os.path.exists(data_dir), "Path '%s' does not exist." % data_dir

In [ ]:
from glob import glob
file_list = sorted(glob(data_dir + '*.hdf5'))

In [ ]:
labels = ['7d', '12d', '17d', '22d', '27d', 'DO']
files_dict = {lab: fname for lab, fname in zip(sorted(labels), file_list)}
files_dict

Plot style


In [ ]:
PLOT_DIR = './figure/'

In [ ]:
import matplotlib as mpl
from cycler import cycler

bmap = sns.color_palette("Set1", 9)
colors = np.array(bmap)[(1,0,2,3,4,8,6,7), :]
colors_labels = ['blue', 'red', 'green', 'violet', 'orange', 'gray', 'brown', 'pink', ]
for c, cl in zip(colors, colors_labels):
    locals()[cl] = tuple(c) # assign variables with color names

mpl.rcParams['axes.prop_cycle'] = cycler('color', colors)
sns.palplot(colors)

Analysis Parameters


In [ ]:
## Background fit
bg_kwargs_auto = dict(fun=bg.exp_fit,
                 time_s = 30,
                 tail_min_us = 'auto',
                 F_bg=1.7,
                 )

## Burst search
F = 6
ph_sel = Ph_sel(Dex='Dem') 
#ph_sel = Ph_sel('all') 
size_min = 80

## D-only peak fit with KDE
bandwidth = 0.03
binwidth = 0.025
E_range_do = (-0.05, 0.1)
weights = 'size'

In [ ]:
df = pd.DataFrame(index=['7d', '12d', '17d', 'DO'], columns=range(8), dtype=float)
df.index.name = 'Sample'
df.columns.name = 'Channel'
E_do = df.copy()
E_do_g = df.copy()
nbursts = df.copy()

Utility functions


In [ ]:
def print_fit_report(E_pr, gamma=1, leakage=0, dir_ex_t=0, math=True):
    """Print fit and standard deviation for both corrected and uncorrected E
    Returns d.E_fit.
    """
    E_corr = fretmath.correct_E_gamma_leak_dir(E_pr, gamma=gamma, leakage=leakage, dir_ex_t=dir_ex_t)
    
    E_pr_mean = E_pr.mean()*100
    E_pr_delta = (E_pr.max() - E_pr.min())*100
    
    E_corr_mean = E_corr.mean()*100
    E_corr_delta = (E_corr.max() - E_corr.min())*100
    if math:
        display(Math(r'\text{Pre}\;\gamma\quad\langle{E}_{fit}\rangle = %.1f\%% \qquad'
                     '\Delta E_{fit} = %.2f \%%' % \
                     (E_pr_mean, E_pr_delta)))
        display(Math(r'\text{Post}\;\gamma\quad\langle{E}_{fit}\rangle = %.1f\%% \qquad'
                     '\Delta E_{fit} = %.2f \%%' % \
                     (E_corr_mean, E_corr_delta)))
    else:
        print('Pre-gamma  E (delta, mean):  %.2f  %.2f' % (E_pr_mean, E_pr_delta))
        print('Post-gamma E (delta, mean):  %.2f  %.2f' % (E_corr_mean, E_corr_delta))

7bp sample


In [ ]:
str(ph_sel)

In [ ]:
data_id = '7d'
d7 = loader.photon_hdf5(files_dict[data_id])
d7.calc_bg(**bg_kwargs_auto)

In [ ]:
d7.burst_search(m=10, F=F, ph_sel=ph_sel)

In [ ]:
ds7 = d7.select_bursts(select_bursts.nd, th1=size_min)
dx = ds7

In [ ]:
## KDE Fit
E_do.loc[data_id] = bext.fit_bursts_kde_peak(dx, bandwidth=bandwidth, x_range=E_range_do,  
                                             weights=weights)

## Gaussian fit
dx.E_fitter.histogram(binwidth=binwidth, weights=weights)
dx.E_fitter.fit_histogram(mfit.factory_gaussian())
E_do_g.loc[data_id] = dx.E_fitter.params['center']

## D-only selection
do_s = dx.select_bursts(select_bursts.E, E2=0.1)
nbursts.loc[data_id] = do_s.num_bursts

In [ ]:
dplot(dx, hist_fret, binwidth=binwidth, weights=weights, show_kde=True, show_kde_peak=True, show_fit_value=True);
plt.xlim(xmax = 0.49)
print_fit_report(E_do.loc[data_id])

In [ ]:
dplot(dx, hist_fret, binwidth=binwidth, weights=weights, show_model=True, show_fit_value=True, fit_from='center');
plt.xlim(xmax = 0.49)
print_fit_report(E_do_g.loc[data_id])

Alternative plots


In [ ]:
fig, axes = plt.subplots(4, 2, figsize=(12, 8), sharex=True, sharey=True)
fig.subplots_adjust(left=0.08, right=0.96, top=0.93, bottom=0.07,
                    wspace=0.06, hspace=0.18)

for ich, ax in enumerate(axes.ravel()):
    mfit.plot_mfit(dx.E_fitter, ich=ich, ax=ax, plot_model=False, plot_kde=True)
plt.xlim(-0.2, 0.3)
print_fit_report(E_do.loc[data_id])

In [ ]:
fig, axes = plt.subplots(4, 2, figsize=(12, 8), sharex=True, sharey=True)
fig.subplots_adjust(left=0.08, right=0.96, top=0.93, bottom=0.07,
                    wspace=0.06, hspace=0.18)

for ich, ax in enumerate(axes.ravel()):
    mfit.plot_mfit(dx.E_fitter, ich=ich, ax=ax)
plt.xlim(-0.2, 0.3)
print_fit_report(E_do_g.loc[data_id])

12bp sample


In [ ]:
data_id = '12d'
d12 = loader.photon_hdf5(files_dict[data_id])
d12.calc_bg(**bg_kwargs_auto)

In [ ]:
d12.burst_search(m=10, F=F, ph_sel=ph_sel)

In [ ]:
ds12 = d12.select_bursts(select_bursts.nd, th1=size_min)
dx = ds12

In [ ]:
## KDE Fit
E_do.loc[data_id] = bext.fit_bursts_kde_peak(dx, bandwidth=bandwidth, x_range=E_range_do,  
                                             weights=weights)

## Gaussian fit
dx.E_fitter.histogram(binwidth=binwidth, weights=weights)
dx.E_fitter.fit_histogram(mfit.factory_gaussian())
E_do_g.loc[data_id] = dx.E_fitter.params['center']

## D-only selection
do_s = dx.select_bursts(select_bursts.E, E2=0.1)
nbursts.loc[data_id] = do_s.num_bursts

In [ ]:
dplot(dx, hist_fret, binwidth=binwidth, weights=weights, show_kde=True, show_kde_peak=True, show_fit_value=True);
plt.xlim(xmax = 0.49)
print_fit_report(E_do.loc[data_id])

In [ ]:
dplot(dx, hist_fret, binwidth=binwidth, weights=weights, show_model=True, show_fit_value=True, fit_from='center');
plt.xlim(xmax = 0.49)
print_fit_report(E_do_g.loc[data_id])

17bp sample


In [ ]:
data_id = '17d'
d17 = loader.photon_hdf5(files_dict[data_id])
d17.calc_bg(**bg_kwargs_auto)

In [ ]:
d17.burst_search(m=10, F=F, ph_sel=ph_sel)

In [ ]:
ds17 = d17.select_bursts(select_bursts.nd, th1=size_min)
dx = ds17

In [ ]:
## KDE Fit
E_do.loc[data_id] = bext.fit_bursts_kde_peak(dx, bandwidth=bandwidth, x_range=E_range_do,  
                                             weights=weights)

## Gaussian fit
dx.E_fitter.histogram(binwidth=binwidth, weights=weights)
dx.E_fitter.fit_histogram(mfit.factory_two_gaussians(p1_center=0.03, p2_center=0.25))
E_do_g.loc[data_id] = dx.E_fitter.params['p1_center']

## D-only selection
do_s = Sel(dx, select_bursts.E, E2=0.1)
nbursts.loc[data_id] = do_s.num_bursts

In [ ]:
dplot(dx, hist_fret, binwidth=binwidth, weights=weights, show_kde=True, show_kde_peak=True, show_fit_value=True);
plt.xlim(xmax = 0.49)
print_fit_report(E_do.loc[data_id])

In [ ]:
dplot(dx, hist_fret, binwidth=binwidth, weights=weights, show_model=True, show_fit_value=True, fit_from='p1_center');
plt.xlim(xmax = 0.49)
print_fit_report(E_do_g.loc[data_id])

DO sample


In [ ]:
data_id = 'DO'
do = loader.photon_hdf5(files_dict[data_id])
do.calc_bg(**bg_kwargs_auto)

In [ ]:
do.burst_search(L=10, m=10, F=F, ph_sel=ph_sel)

In [ ]:
dos = do.select_bursts(select_bursts.nd, th1=size_min)
dx = dos

In [ ]:
## KDE Fit
E_do.loc[data_id] = bext.fit_bursts_kde_peak(dx, bandwidth=bandwidth, x_range=E_range_do,  
                                             weights=weights)

## Gaussian fit
dx.E_fitter.histogram(binwidth=binwidth, weights=weights)
dx.E_fitter.fit_histogram(mfit.factory_gaussian())
E_do_g.loc[data_id] = dx.E_fitter.params['center']

## D-only selection
nbursts.loc[data_id] = dx.num_bursts

In [ ]:
dplot(dx, hist_fret, binwidth=binwidth, weights=weights, show_kde=True, show_kde_peak=True, show_fit_value=True);
plt.xlim(xmax = 0.49)
print_fit_report(E_do.loc[data_id])

In [ ]:
dplot(dx, hist_fret, binwidth=binwidth, weights=weights, show_model=True, show_fit_value=True, fit_from='center');
plt.xlim(xmax = 0.49)
print_fit_report(E_do_g.loc[data_id])

Results


In [ ]:
ph_sel

In [ ]:
nbursts = nbursts.astype(int)
nbursts

In [ ]:
E_do_kde = E_do.copy()

In [ ]:
leakage_kde = (E_do_kde / (1 - E_do_kde)).round(6)
leakage_kde

In [ ]:
leakage_gauss = (E_do_g / (1 - E_do_g)).round(6)
leakage_gauss

In [ ]:
sns.set(style='ticks', font_scale=1.4)

In [ ]:
colors = sns.color_palette('Paired', 8)
mpl.rcParams['axes.prop_cycle'] = cycler('color', colors)
sns.palplot(colors)

In [ ]:
fig, ax = plt.subplots()
ax2 = ax.twinx()

kws = dict(lw=2, marker='o', ms=8)
for i, did in enumerate(('7d', '12d', '17d', 'DO')):
    (100*leakage_kde).loc[did].plot(label='%s KDE' % did, ax=ax, color=colors[1+i*2], **kws)
    nbursts.loc[did].plot(ax=ax2, ls='--', lw=2.5, color=colors[1+i*2])

for i, did in enumerate(('7d', '12d', '17d', 'DO')):    
    (100*leakage_gauss).loc[did].plot(label='%s Gauss' % did, ax=ax, color=colors[i*2], **kws)
    
handles, lab = ax.get_legend_handles_labels()
h = handles#[1::2] + handles[::2]
l = lab[1::2] + lab[::2]
ax.legend(ncol=2, loc=1, bbox_to_anchor=(1, 0.5), borderaxespad=0.)
ax.set_ylim(0)

ax2.set_ylim(0, 3200)
plt.xlim(-0.25, 7.25)
plt.xlabel('Channel')
ax.set_ylabel('Leakage %')
ax2.set_ylabel('# Bursts')
sns.despine(offset=10, trim=True, right=False)

Save results


In [ ]:
leakage_kde.to_csv('results/Multi-spot - leakage coefficient all values KDE %s.csv' % str(ph_sel))
leakage_gauss.to_csv('results/Multi-spot - leakage coefficient all values gauss %s.csv' % str(ph_sel))
nbursts.to_csv('results/Multi-spot - leakage coefficient all values nbursts %s.csv' % str(ph_sel))

Average leakage

Mean per sample:


In [ ]:
lk_s = pd.DataFrame(index=['mean', 'std'], columns=E_do.index)

lk_s.loc['mean'] = leakage_kde.mean(1)*100
lk_s.loc['std'] = leakage_kde.std(1)*100

lk_s['mean'] = lk_s.mean(1)
lk_s

In [ ]:
lk_s.T[['std']]

In [ ]:
lk_s.T[['mean']].plot(table=True)
plt.gca().xaxis.set_visible(False)   # Hide Ticks

Mean per sample (weighted on the number of bursts):

Number of bursts in D-only population:


In [ ]:
nbursts

In [ ]:
lk_sw = pd.DataFrame(index=['mean', 'std'], columns=E_do.index)

lk_sw.loc['mean'] = (nbursts*leakage_kde).sum(1)/nbursts.sum(1)*100
lk_sw.loc['std'] = (nbursts*leakage_kde).std(1)/nbursts.sum(1)*100

lk_sw['mean'] = (nbursts.sum(1)*lk_sw).sum(1)/nbursts.sum(1).sum()
lk_sw

In [ ]:
lk_sw.loc['mean'].plot()
ylim(2, 4)

Mean per channel:


In [ ]:
lk_c = pd.DataFrame(index=['mean', 'std'], columns=E_do.columns)

lk_c.loc['mean'] = leakage_kde.mean()*100
lk_c.loc['std'] = leakage_kde.std()*100

lk_c['mean'] = lk_c.mean(1)
lk_c

In [ ]:
lk_c.loc['mean'].plot()
ylim(2, 4)

Mean per channel (weighted on the number of bursts):


In [ ]:
lk_cw = pd.DataFrame(index=['mean', 'std'], columns=E_do.columns)

lk_cw.loc['mean'] = (nbursts*leakage_kde).sum()/nbursts.sum()*100
lk_cw.loc['std'] = (nbursts*leakage_kde).std()/nbursts.sum()*100

lk_cw['mean'] = lk_cw.mean(1)
lk_cw

In [ ]:
lk_cw.loc['mean'].plot()
ylim(2, 4)

In [ ]:
mch_plot_bg(d7)

NOTE: There is a per-channel trend that cannot be ascribed to the background because we performend a D-emission burst search and selection and the leakage vs ch does not resemble the D-background vs channel curve.

The effect is probably due to slight PDE variations (detectors + optics) that slightly change $\gamma$ on a per-spot basis.

Weighted mean of the weighted mean


In [ ]:
lk = (lk_cw.ix['mean', :-1]*nbursts.mean()).sum()/(nbursts.mean().sum())/100
lk_2 = (lk_sw.ix['mean', :-1]*nbursts.mean(1)).sum()/(nbursts.mean(1).sum())/100

assert np.allclose(lk, lk_2)

print('Mean leakage: %.6f' % lk)